In [1]:
import vincent
import pandas as pd
from vincent import AxisProperties, PropertySet, ValueRef
from vincent import Map
vincent.core.initialize_notebook()
In [ ]:
incidents = pd.read_csv('sanfrancisco_incidents_summer_2014.csv')
Changing the column labels of the data set
In [ ]:
incidents.columns = ['Id'
,'Category'
,'Description'
,'DayOfWeek'
,'Date'
,'Time'
,'District'
,'Resolution'
,'Address'
,'Longitude'
,'Latitude'
,'Location'
,'PdId']
The date and time of incident are in two separate columns. Combining them into a DateTime column
In [2]:
# the date and time of incident are in two separate columns
# combining them into a date_time column
incidents['DateTime'] = pd.to_datetime(incidents['Date'] + ' ' + incidents['Time'])
date_idx = pd.DatetimeIndex(incidents['DateTime'])
incidents['Date'] = date_idx.date.astype('datetime64')
incidents['Hour'] = date_idx.hour
incidents['Year'] = date_idx.year
incidents['Month'] = date_idx.month
incidents['Weekday'] = date_idx.weekday
In [126]:
count_by_category = pd.DataFrame({'count' : incidents.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
#ax = count_by_category.plot(kind="barh",x='Category', y='count',sort_columns=True)
In [121]:
by_year = incidents.pivot_table('Id'
, aggfunc='count'
, index='Weekday'
, columns='District')
In [130]:
graph = vincent.Line(by_year)
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
In [133]:
filtered = incidents[incidents['District'] == 'MISSION']
filtered = filtered[~filtered['Category'].isin(['LARCENY/THEFT'
,'NON-CRIMINAL'
,'OTHER OFFENSES'
,'WARRANTS'
,'DISORDERLY CONDUCT'])]
In [134]:
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['Category'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['Category']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='Category')
graph.legend(title='Category')
graph.axis_titles(x='Category', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
In [125]:
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='Category')
graph = vincent.Line(by_hour)
graph.legend(title='Category')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [135]:
filter_by_category = 'MISSING PERSON'
In [136]:
filtered = incidents[incidents['Category'] == filter_by_category]
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [138]:
count_by_category = pd.DataFrame({'count' : filtered.groupby( ['District'] ).size()}).reset_index()
count_by_category.sort_values(by='count',ascending= True,inplace=True)
index = count_by_category['District']
graph = vincent.Bar(count_by_category,columns=['count'], key_on='District')
graph.legend(title='District')
graph.axis_titles(x='District', y='Incident Count')
ax = AxisProperties(
labels=PropertySet(
angle=ValueRef(value=270),
align=ValueRef(value='right')
)
)
graph.axes[0].properties = ax
graph.display()
In [140]:
filter_by_districts = ['MISSION','SOUTHERN','PARK','BAYVIEW']
In [141]:
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'].isin(filter_by_districts)]
#filtered = filtered[filtered['Weekday'] == 2]
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Line(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [144]:
filter_by_district = 'MISSION'
filter_by_category = 'MISSING PERSON'
filtered = incidents[incidents['Category'] == filter_by_category]
filtered = filtered[filtered['District'] == filter_by_district]
In [145]:
by_hour = filtered.pivot_table('Id'
, aggfunc='count'
, index='Hour'
, columns='District')
graph = vincent.Bar(by_hour) #,columns=['count'],key_on='District')
graph.legend(title='District')
graph.axis_titles(x='Hour', y='Incident Count')
graph.display()
In [146]:
by_weekday = filtered.pivot_table('Id'
, aggfunc='count'
, index='Weekday'
, columns='District')
graph = vincent.Bar(by_weekday)
graph.legend(title='District')
graph.axis_titles(x='Weekday', y='Incident Count')
graph.display()
In [114]:
def getExtent(data):
extentMin = [data['Latitude'].min(),data['Longitude'].min()]
extentMax = [data['Latitude'].max(),data['Longitude'].max()]
center = [data['Latitude'].mean(),data['Longitude'].mean()]
return (center,extentMin,extentMax)
In [115]:
center,extentMin,extentMax = getExtent(incidents)
center
Out[115]:
In [108]:
#filter_by_district = 'CENTRAL'
#filter_by_category = 'VEHICLE THEFT'
#filtered = filtered[filtered['Hour'] == 11]
#filtered = filtered[filtered['Hour'] < 19]
#filtered = filtered[filtered['Weekday'] == 4]
#filtered = incidents[incidents['Category'] == filter_by_category]
#filtered = filtered[filtered['District'] == filter_by_district]
In [ ]:
In [116]:
import folium
dots = filtered.as_matrix(columns=['Latitude','Longitude'])
map_osm = folium.Map(location=center, zoom_start=13, max_zoom=16)
map_osm.create_map(path='osm.html')
for dot in dots:
map_osm.simple_marker(dot)
map_osm
Out[116]:
In [ ]:
In [ ]:
incidents.head()
In [ ]: